***************************************************************
* .do-file to compute some first stage parameters of models  
***************************************************************

clear all
set maxvar 7000

use "$psidwkddata/famind_combined_adj_sing.dta", clear

// generate personal identifier
gen uniqueid_ref = (famid68*1000) + persid if ref    == 1
gen uniqueid_sp  = (famid68*1000) + persid if spouse == 1

gen uniqueid     = uniqueid_ref if ref == 1
replace uniqueid = uniqueid_sp  if spouse == 1

// only keep reference person and spouses
keep if uniqueid != .

/////////////////////////////////
/////// COUNT OBSERVATIONS //////
/////////////////////////////////

count
tab single, m

codebook uniqueid if single == .
codebook uniqueid if single == 0
codebook uniqueid if single == 1

// distribution of education
tab sexind educind_cat [aw = weightind] , row nofreq

/////////////////////////////////
/////// INITIAL CONDITIONS //////
/////////////////////////////////

preserve
keep if ref == 1 | spouse == 1
keep if ageav > 29 & ageav < 31

// marital status by gender and education
bysort sexind educind_cat: tab single [aw = weightind], m

// who is married to whom (by education)
keep if couple != .
keep educind_cat famid spouse ref wave sexind famweight

tab ref sexind 		// all men are ref, all women are spouse
tab spouse sexind

gen id = famid*100+wave

gen     ind = "ref" if ref == 1
replace ind = "spouse" if spouse == 1

keep educind_cat id ind famweight

reshape wide educind_cat famweight, i(id) j(ind) string

tab educind_catref educind_catspouse [aw = famweightref], row nofreq
tab educind_catspouse educind_catref [aw = famweightref], row nofreq
restore

//////////// WHO IS MARRIED TO WHOM? (FOR MARRIAGE MARKET) /////////

// note: those not merged: individual age (ageind) is below 30 or above 65

// note: data is produced in file 5_estimate_incomeprocess
merge 1:1 uniqueid wave using "$resultpath/incomeresid.dta"
drop _merge

sum resid11 if educcat == 0  [aw = weightind], det  // cutoff 25%: -0.33,  75%: 0.49
sum resid11 if educcat == 1  [aw = weightind], det  // cutoff 25%: -0.36,  75%: 0.52

sum resid12 if educcat == 0   [aw = weightind], det  // cutoff 25%: -0.48, 75%: 0.73
sum resid12 if educcat == 1   [aw = weightind], det  // cutoff 25%: -0.49, 75%: 0.78

// create variable for productivity of husband and wife (on family level)
bysort famid wave: egen prod_hsb = max(resid11)
bysort famid wave: egen prod_wf  = max(resid12)

preserve
keep if ref == 1   // on household level, therefore only keep reference persons

// categorial varialbe for productivity (three states)
gen prod_wf_cat = . 
gen prod_hsb_cat = .

replace prod_hsb_cat = 1 if prod_hsb <= -0.33 & educcat ==0 
replace prod_hsb_cat = 2 if prod_hsb > -0.33  & prod_hsb <= 0.49 & educcat ==0
replace prod_hsb_cat = 3 if prod_hsb > 0.49  & educcat ==0

replace prod_hsb_cat = 1 if prod_hsb <= -0.36 & educcat ==1
replace prod_hsb_cat = 2 if prod_hsb > -0.36  & prod_wf <= 0.52 & educcat ==1
replace prod_hsb_cat = 3 if prod_hsb > 0.52  & educcat ==1

replace prod_wf_cat = 1 if prod_wf <= -0.48 & educcatsp ==0 
replace prod_wf_cat = 2 if prod_wf > -0.48  & prod_wf <= 0.73 & educcatsp ==0
replace prod_wf_cat = 3 if prod_wf > 0.73  & educcatsp ==0

replace prod_wf_cat = 1 if prod_wf <= -0.49 & educcatsp ==1 
replace prod_wf_cat = 2 if prod_wf > -0.49  & prod_wf <= 0.78 & educcatsp ==1
replace prod_wf_cat = 3 if prod_wf > 0.78  & educcatsp ==1

// matching in terms of productivity
bysort prod_wf_cat educcatsp: tab prod_hsb_cat educcat [aw = weightind] 
bysort prod_hsb_cat educcat: tab prod_wf_cat educcatsp [aw = weightind]

restore



//////////// EQUIVALENCE SCALES IN PSID /////////

**** FIRST ADULT: 1, OTHER ADULTS: 0.7, CHILDREN: 0.5 ****

gen numadult = hhmemb - numchild 

// collapse on hh level 
collapse(max) single couple ageref hhmemb numchild numadult famweight, by(wave famid)

// single men
foreach num of numlist 30/65 {
sum numchild if single == 0 & ageref == `num'  [aweight = famweight], det
gen num_`num' = r(mean)*0.5
sum numadult if single == 0 & ageref == `num'  [aweight = famweight], det
replace num_`num' = num_`num'+ 1 + (r(mean)-1)*0.7 
}

// single women 
foreach num of numlist 30/65 {
sum numchild if single == 1 & ageref == `num'  [aweight = famweight], det
gen nuf_`num' = r(mean)*0.5
sum numadult if single == 1 & ageref == `num'  [aweight = famweight], det
replace nuf_`num' = nuf_`num'+1 + (r(mean)-1)*0.7
}

// couples
foreach num of numlist 30/65 {
sum numchild if couple != . & ageref == `num'  [aweight = famweight], det
gen nuc_`num' = r(mean)*0.5
sum numadult if couple != . & ageref == `num'  [aweight = famweight], det
replace nuc_`num' = nuc_`num'+1 + (r(mean)-1)*0.7
}

/// Plot household sizes by family type over the life-cycle (Figure xx) ////

// prepare data
keep nuf* num* nuc*
drop numchild numadult
duplicates drop 

gen id = 1
reshape long num_ nuc_ nuf_, i(id) j(age)
drop id

	tw  (line nuc_ age, lc(black) lw(medium)) ///
		(line num_ age, lp(dot) lc(black) lw(thick)) ///
		(line nuf_ age, lp(dash)  lc(black) lw(medium)) ,  ///
		xtitle("Age", siz(vlarge)) ytitle("Average HH Size", size(vlarge))  ///
		legend(order(1 "Couples" 2 "Men" 3 "Women") size(vlarge) ///
		position(6) col(4) region(lcolor(white)) stack) ///
		graphregion(color(white)) xlabel(30(10)65,labsize(vlarge)) ylabel(1(1)3,labsize(vlarge))
		graph export "$resultpath/equivalence_scales.eps", replace
		graph export "$resultpath/equivalence_scales.pdf", replace
		